global data ""  // path for source data
global temp ""  // path for intermediary data
global final ""  // path for final replication sample
global tables ""  // path for exporting tables
global figures ""  // path for exporting figures

/*------------------------------------------------------------------------------
This do file processes the raw survey data (CFPS, CGSS, CHFS) to generate the 
cleaned data for empirical analysis. 

Input:
	cfps2010family.dta: CFPS 2010 wave, family module;
	cfps2010adult.dta: CFPS 2010 wave, adult module.
	cgss2008.dta: CGSS 2008 wave;
	cgss2010.dta: CGSS 2010 wave;
	cgss2011.dta: CGSS 2011 wave;
	cgss2012.dta: CGSS 2012 wave;
	cgss2013.dta: CGSS 2013 wave.
	chfs2011_ind.dta: CHFS 2011 wave, individual module;
	chfs2011_hh.dta: CHFS 2011 wave, household module;
	chfs2011_city.dta: CHFS 2011 wave, city module;
	chfs2013_ind.dta: CHFS 2013 wave, individual module;
	chfs2013_hh.dta: CHFS 2013 wave, household module;
	chfs2013_city.dta: CHFS 2013 wave, city module.
	famine-intensity-estc.dta: Famine intensity for 2,727 counties.

Output:
	cfps10_sample.dta: To replicate Table S7, S12;
	cgss101213_sample.dta: To replicate Table S7;
	cgss08_sample.dta: To replicate Table S11, S12;
	cgss11_sample.dta: To replicate Table S10, S11;
	chfs11_sample.dta: To replicate Table S7, S8;
	chfs13_sample.dta: To replicate Table 2, S9.
------------------------------------------------------------------------------*/

****************************
// CFPS
****************************

use "$data/cfps2010family.dta",clear 

keep fid ff3_s_1 ff3_s_2
save "$temp/Family_WRKG.dta", replace 

** CFPS2010
use "$data/cfps2010adult.dta",clear 

rename gender male
rename qg303 work_nature 
rename qg305 orgn 
rename qg307code occup_code 
recode qm706 (-8 6 = .) (3 = 4) (4= 5) (5=3), gen(corruption)  

label var corruption "Use of bribery" 
recode work_nature (-8 -2 -1 = .)
recode orgn (-8 -2 -1 = .)
recode occup_code (-8 -7 -2 -1 = .) 

merge m:1 fid using "$temp/Family_WRKG.dta"

drop if _m == 2
drop _m

gen survey = "cfps2010" 

compress 
	
gen private_ep = (orgn == 7| orgn == 11) 
gen private_boss = (private_ep == 1 & work_nature == 1) 
replace private_boss = 1 if occup_code == 10500 
gen byte stock = 0 if ff3_s_1 >=0 
replace stock = 1 if ff3_s_1 == 1 | ff3_s_2 == 1 
label variable private_boss "Head of private enterprise" 

gen byte investment = 0 if ff3_s_1 >=0
replace investment = 1 if ff3_s_1 != 78 
gen wave = 2010
compress 

merge m:1 uid2000 using "$final/famine-intensity-estc.dta"
keep if _m == 3
drop _merge

rename uid2000 countyid
keep stock investment private_boss birthyr survey countyid wave ///
	rltv_death_rate_n_1990 corruption male
label var stock "Share investment"
label var investment "Financial investment"
label var private_boss "Owner"

save "$final/cfps10_sample.dta",replace 


****************************
// CGSS
****************************

//CGSS 2010 2012 2013
use "$data/cgss2010.dta",clear
keep a2 a3a a679 a671 a59a uid2000
gen wave=2010
append using "$data/cgss2012.dta",keep(a2 a3a a679 a671 a59a uid2000)
recode wave (.=2012)
append using "$data/cgss2013.dta",keep(a2 a3a a679 a671 a59a uid2000)
recode wave (.=2013)

rename a3a birthyr
recode birthyr (-2 -3 =.)
gen male=a2==1
gen stock=a671 if a671~=-3
		
gen investment=1-a679

recode a59a (-1 -2 -3=.)
gen private_boss =(a59a==1) if a59a~=.
gen survey = "cgss" 

merge m:1 uid2000 using "$final/famine-intensity-estc.dta"
keep if _m == 3
drop _m

rename uid2000 countyid
keep birthyr male stock investment private_boss rltv_death_rate_n_1990 ///
	survey wave countyid

save "$final/cgss101213_sample.dta",replace 


// CGSS-2008
use "$data/cgss2008.dta", clear

rename a2 birthyr
gen wave=2008
gen male = (a1 == 1) 		
gen health = a19 
drop if c3 == 1 
label variable health "Health condition"

foreach i in 3 5 {
gen resil`i' = d40`i' if d40`i' != 6
replace resil`i' = 5 - resil`i'
} 

label var resil3 "Even if not well"
label var resil5 "Even if long time"

recode e3a e3b e3c e3d e3e e3f (6=.)
pca e3a e3b e3c e3d e3e e3f  
predict guanxi

label var guanxi "Use of guanxi"
label variable male "Male"

merge m:1 uid2000 using "$final/famine-intensity-estc.dta"
keep if _m == 3
drop _m

g byte send_down = (c17c01==9 | c17c02==9 | c17c03==9 ) if !mi(c17c01) 
gen famine_sdown = rltv_death_rate_n_1990 * send_down 
label variable send_down "Send down"
label variable famine_sdown "Intensity x send down" 

rename uid2000 countyid

keep countyid male health resil3 resil5 send_down famine_sdown ///
	rltv_death_rate_n_1990 wave guanxi birthyr

save "$final/cgss08_sample".dta, replace


// CGSS2011
use "$data/cgss2011.dta", clear
rename a3a birthyr
recode birthyr (-2 -3 =.)
gen male=a2==1
gen wave=2011

replace ma13 = . if ma13 <0 
replace ma14 = . if ma14 < 0 
replace a43g = . if a43g <0
gen d17_d =6 - d17d if d17d>0 & d17d !=8
gen d17_e =6 - d17e if d17e>0 & d17e !=8
replace d31a = . if d31a <0 
replace d31a = 6-d31a 
replace d31b = . if d31b < 0 

label var a43g "Power"
label var d17_d "Confidence"
label var d17_e "Difficulties"
label var d31a "Control" 
label var d31b "Confidence" 

gen  resil3 = 5 - d35a if d35a > 0 
gen  resil5 = 5 - d35c if d35c > 0 

label var resil3 "Even if not well"
label var resil5 "Even if long time"

merge m:1 uid2000 using "$final/famine-intensity-estc.dta"
keep if _m == 3
drop _m

rename uid2000 countyid

keep countyid male birthyr rltv_death_rate_n_1990 resil* a43g d17_d d17_e ///
	d31a d31b wave

save "$final/cgss11_sample".dta, replace


****************************
// CHFS
****************************
//CHFS 2011
use "$data/chfs2011_ind.dta",clear 

**Demographic information 
rename a2005 birthyr 
rename a3000 employed 
gen male=(a2003==1) if a2003~=.
gen minority=(a2011~=1) if a2011~=.
gen primary=(a2012==2|a2012==3) if a2012~=.
gen highschool=(a2012==4|a2012==5) if a2012~=.
gen college=(a2012>=6)
gen age=2013-a2005

gen time_preference=(a4011==1) if a4011~=.
label var time_preference "Time preference"

**match with the household investment 
merge m:1 hhid using "$data/chfs2011_hh.dta"
keep if _m ==3 
drop _m 

drop if a2010 == 2  //Not chinese 
drop if a2018 == 2 //non-local 

gen private_boss = (a3003 == 2) 
gen byte stock = (d3101 == 1)
replace stock = . if mi(d3101) 
gen byte investment = 0 
replace investment = 1 if d3101 == 1 | (d4103_1>0&!mi(d4103_1)) | ///
	(d5103>0&!mi(d5103)) | d6103 == 1 | d7103 == 1  

gen survey = "chfs"
gen wave = 2011 
rename a2025b health
replace health=(6-health) 
gen respond=(a2001==1)
keep if a2001==1

merge m:1 hhid using "$data/chfs2011_city.dta",keepusing(rural city_lab)
keep if _m ==3 
drop _m 
gen urban=1-rural
rename city_lab cityid

merge m:1 uid2000 using "$final/famine-intensity-estc.dta"
keep if _m == 3
drop _m

rename uid2000 countyid

keep survey countyid wave male birthyr time_preference respond private_boss ///
	stock investment health urban minority primary highschool college ///
	rltv_death_rate_n_1990 cityid age swgt

save "$final/chfs11_sample.dta",replace 

//CHFS 2013

use "$data/chfs2013_ind.dta",clear 

gen male=(a2003==1) if a2003~=.
gen birthyr=a2005
gen minority=(a2011~=1) if a2011~=.
gen primary=(a2012==2|a2012==3) if a2012~=.
gen highschool=(a2012==4|a2012==5) if a2012~=.
gen college=(a2012>=6)
gen age=2013-a2005

gen working=(a3000==1) if a3000~=.
replace working=1 if a3003~=. 
gen selfemp=(a3003==2) if a3003~=.
replace selfemp=0 if selfemp==.&working==0
replace selfemp=1 if a3028==2

rename a2025b health
replace health=(6-health) 
gen respond=(a2001==1)	

gen risk_spouse=risk_loving if a2001==2
	
merge m:1 hhid_2013 using "$data/chfs2013_hh.dta"
keep if _m ==3 
drop _m 
gen risk_loving=(a4003==1|a4003==2) if a4003~=.
gen family_business=(b2001==1) if b2001~=.
	
gen busi_owner=(selfemp==1&family_business==1) if selfemp~=.&family_business~=.

merge m:1 hhid_2013 using "$data/chfs2013_city.dta",keepusing(rural city_lab)
keep if _m ==3 
drop _m 
gen urban=1-rural
rename city_lab cityid

merge m:1 uid2000 using "$final/famine-intensity-estc.dta"
keep if _m == 3
drop _m

keep a2001 health male age birthyr minority primary highschool college respond ///
	swgt hhid_2013 cityid urban risk_loving rltv_death_rate_n_1990 busi_owner ///
	risk_spouse
save "$temp\chfs2013.dta",replace

gen x=(respond==1&male==0)
bysort hhid_2013: egen hh_female=max(x)  
duplicates drop hhid_2013, force
bysort cityid: egen hhshare_f=mean(hh_female)
label var hhshare_f "Share of female household head in the city"
duplicates drop cityid,force
egen hhshare_fmed=pctile(hhshare_f),p(50)
label var hhshare_fmed "Median share of female household head in the city"
gen hhshare_fhigh=(hhshare_f>=hhshare_fmed)
replace hhshare_f=hhshare_fhigh
keep cityid hhshare_f
save "$temp\hhshare_f2013.dta",replace

use "$temp\chfs2013.dta",clear
merge m:1 cityid using "$temp\hhshare_f2013.dta"

gen rltv_hhshare_f=rltv_death_rate_n_1990*hhshare_f

keep a2001 health male age birthyr minority primary highschool college respond ///
	swgt cityid urban risk_loving rltv_death_rate_n_1990 busi_owner risk_spouse ///
	rltv_hhshare_f hhshare_f

save "$final\chfs13_sample.dta",replace

	


